import fs from 'fs';
import puppeteer from 'puppeteer';

(async () => {

    let cityNames = eval(fs.readFileSync("cityPinyin.txt").toString());
    let pinyinName = new Map();
    for (let e of cityNames) {
        pinyinName.set(e.py, e.name)
    }

    function getCityEntity(line) {
        //http://lishi.tianqi.com/suyouqi/201104.html
        let strings = line
            .replaceAll("http://lishi.tianqi.com/", "")
            .split("/");
        let cityPinyin = strings[0]
        let date = strings[1].replace(".html", "");

        return {"cityName": pinyinName.get(cityPinyin), "date": date};
    }

    function sleep(ms) {
        return new Promise(resolve => setTimeout(resolve, ms));
    }

    let urls = fs.readFileSync("citiMonthUrl.txt")
        .toString()
        .split("\n");


    const browser = await puppeteer.launch();

    const page = await browser.newPage();
    page.setDefaultTimeout(1500);
    const resultSelector = ".tian_three";

    async function extract(url, city) {
        await page.goto(url);

        await page.waitForSelector(resultSelector);
        const links = await page.evaluate(resultSelector => {
            // div:nth-child(3)
            let ulCount = document.querySelectorAll(".tian_three > .thrui > li").length;
            let r = [];
            for (let j = 1; j <= ulCount; j++) {
                let row = []
                for (let i = 1; i <= 5; i++) {
                    const c3 = [...document.querySelectorAll(`.thrui > li:nth-child(${j}) > div:nth-child(${i})`)]
                        .map(div => {
                            return div.innerHTML.trim();
                        })
                    row.push(c3);
                }
                r.push(row.join(","));
            }
            return r.join("\n");
        }, resultSelector);

        if (!fs.existsSync(`out/${city.cityName}/`)) {
            fs.mkdirSync(`out/${city.cityName}/`);
        }
        fs.writeFileSync(`out/${city.cityName}/${city.date}.csv`, links || [].join("\n"),
            {flag: 'w'})
    }

    let hasErr = false;

    for (let i = 0; i < urls.length; i++) {
        let url = urls[i];
        let city = getCityEntity(url);
        if (city.cityName === undefined) {
            console.log("城市名为空")
            continue;
        }
        if (fs.existsSync(`out/${city.cityName}/${city.date}.csv`)) {
            continue;
        }

        {
            console.log(`抓取 ${city.cityName} ${url}`)
            try {
                await extract(url, city);
            } catch (e) {
                console.error("发生错误，等待(3000ms)后重试")
                // 重试次数
                let retryLimit = 7;
                while (retryLimit-- > 0) {
                    await sleep(3000)
                    try {
                        await extract(url, city);
                        console.log("重试成功")
                        break;
                    } catch (e) {

                    }
                }

                if (retryLimit === 0) {
                    hasErr = true;
                    console.error(`重试${retryLimit}次异常，写入日志`);
                    fs.writeFile("error-urls.txt", url + "\n", {flag: "a"}, () => {
                        console.error(`${url} 错误`)
                    })
                }


            } finally {
                let sleepTime;
                let hours = new Date().getHours();
                //23:00 ~ 8：00
                if (hours < 8 || hours >= 20) {
                    sleepTime = 150;
                } else {
                    sleepTime = Math.floor(Math.random() * 200 + 1500);
                }
                console.log("休息时间(ms) ", sleepTime);
                await sleep(sleepTime);
            }
        }
    }
    if (hasErr) {
        console.log("有错误，补漏")
        this()
    }

    await browser.close();

})();

